library(tidyverse) is a collection of useful packages, many of
which we have used before such as dplyr or
ggplot2. Once you load the tidyverse package, you do not
need to load the other packages seperately.
if the tidyverse installation failed, we can also install/load the single libraries dplyr, ggplot2, tibble, pheatmap
the months of the data are indexed by numbers. E.g. 1 means January, 2 means Feb etc
for creating the heatmap, we need to convert the data into the wide format (columns are months, rows are the years) using pivot_wider()
the column_to_rownames() function from the tibble library converts a specified column into the rownames
colnames() function can be used to rename the column names
## load tidyverse packages
library(tibble)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(pheatmap)
library(tidyr)
## load data
meantemp_germany <- read.csv(file = "~/PI/Lectures/R_course/3rd_cycle/02_heatmaps/meantemp_germany.csv", sep = ",")
## modify data (wide format, all data needs to be numeric)
meantemp_germany_clean <- meantemp_germany %>%
select(year, month, mean_temp_ger) %>%
filter(year >= 1972) %>%
pivot_wider(names_from = month, values_from = mean_temp_ger)
meantemp_germany_clean <- column_to_rownames(meantemp_germany_clean, 'year')
colnames(meantemp_germany_clean) <- c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")
#default pheatmap function
pheatmap(meantemp_germany_clean)
#turn off default clustering
pheatmap(meantemp_germany_clean, cluster_rows = FALSE, cluster_cols = FALSE)
pheatmap(meantemp_germany_clean, cluster_rows = FALSE, cluster_cols = FALSE, border_color = "white")
#add gaps between columns and rows
pheatmap(meantemp_germany_clean, cluster_rows = FALSE, cluster_cols = FALSE, border_color = "white", gaps_col = c(3, 6, 9), gaps_row = c(9, 19, 29, 39, 49))
##add annotation
#create annotation data frame
season_df <- data.frame("season" = c("Winter", "Winter", "Spring", "Spring", "Spring", "Summer", "Summer", "Summer", "Autumn", "Autumn", "Autumn", "Winter"))
#change row names
row.names(season_df) <- c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")
pheatmap(meantemp_germany_clean, cluster_cols = F, cluster_rows = F, border_color = "white", gaps_col = c(3, 6, 9), gaps_row = c(9, 19, 29, 39, 49), annotation_col = season_df)
angle_col argument# change label angle of columns
pheatmap(meantemp_germany_clean, cluster_cols = F, cluster_rows = F, border_color = "white", gaps_col = c(3, 6, 9), gaps_row = c(9, 19, 29, 39, 49), annotation_col = season_df, angle_col = 45)
display_numbers# change label angle of columns
pheatmap(meantemp_germany_clean, cluster_cols = F, cluster_rows = F, border_color = "white", gaps_col = c(3, 6, 9), gaps_row = c(9, 19, 29, 39, 49), annotation_col = season_df, angle_col = 45, display_numbers = T)
# assign heatmap to a variable
plot <- pheatmap(meantemp_germany_clean, cluster_cols = F, cluster_rows = F, border_color = "white", gaps_col = c(3, 6, 9), gaps_row = c(9, 19, 29, 39, 49), annotation_col = season_df, angle_col = 45, display_numbers = T)
# save the plot as e.g. png
ggsave(
"~/Desktop/heatmap.png",
plot = plot,
width = 15,
height = 20,
units = c("cm"),
)
## change annotation colors
#create color list
season_color = list(season = c("Winter" = "blue", "Spring" = "green", "Summer" = "red", "Autumn" = "yellow"))
pheatmap(meantemp_germany_clean, cluster_cols = F, cluster_rows = F, border_color = "white", gaps_col = c(3, 6, 9), gaps_row = c(9, 19, 29, 39, 49), annotation_col = season_df, annotation_colors = season_color)
#skip row names
pheatmap(meantemp_germany_clean, cluster_cols = F, cluster_rows = F, border_color = "white", gaps_col = c(3, 6, 9), gaps_row = c(9, 19, 29, 39, 49), annotation_col = season_df, angle_col = 45, display_numbers = T, labels_row = c("1972", "", "1974", "", "1976", "", "1978", "", "1980", "", "1982", "", "1984", "", "1986", "", "1988", "", "1990", "", "1992", "", "1994", "", "1996", "", "1998", "", "2000", "", "2002", "", "2004", "", "2006", "", "2008", "", "2010", "", "2012", "", "2014", "", "2016", "", "2018", "", "2020", "", "2022"))
# load data
seq_data_clean <- read.csv(file = "~/PI/Lectures/R_course/3rd_cycle/02_heatmaps/seq_data_clean.csv", sep = ",")
# get rid of unneccessary X column
seq_data_clean$X <- NULL
# use gene names as rownames
seq_data_clean <- column_to_rownames(seq_data_clean, "gene_name")
# generate heatmap
pheatmap(seq_data_clean, scale = "row")